Setup

knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

# Load libraries
library(tidyverse)
library(cowplot)
library(ggplot2)
library(ggpattern)
library(ggsci)
library(utils)
library(reshape2)
library(stringr)
library(gridGraphics)
library(zellkonverter)
library(SingleCellExperiment)
library(cytomapper)
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th152"
# analysis.path <- "/mnt/bb_dqbm_volume/analysis/Immucan_lung"

u_stability.path <- file.path(analysis.path, "tests/test_U_stability")
u_stability_par.path <- file.path(analysis.path, "tests/test_U_stability_par")
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")

Training Data Size

Read in results from the stability analysis of U using different training data sizes (U_stability.ipynb). We then look at the correlations between each U and the ‘ground truth’, eg. the U computed from the largest training test using the [Mantel test] (https://tiagoolivoto.github.io/metan/reference/mantel_test.html).

# Read .csv file
u_stability.list <- lapply(list.files(u_stability.path, "results.csv", full.names=TRUE, 
                                    recursive=TRUE), function(file){
  k_name <- gsub("k_", "", str_split(file, "/")[[1]][8])
  read_csv(file)[-1] %>% mutate(k=k_name)
})

# Read in segmentation SCE results
u_stability_modules.files <- list.files(u_stability.path, "gene_modules.csv",
                                        full.names=TRUE, recursive=TRUE)
u_stability.modules <- lapply(u_stability_modules.files, read_U, type="size") %>% bind_rows()

Mantel Test for k=3 and k=1 (d=80, iter=10, paper_norm)

# Calculate mantel test for each U compared to 'ground truth' U of average U
# using the full data set for all tested k
u_stability.mantel <- lapply(unique(u_stability.modules$k), function(k_iter){
  # Empty list to save correlation matrices
  u_stability.cor <- list()
  
  # Go over every training size and replicate
  for(s in unique(u_stability.modules$size)){
    temp_list <-  list()
    for(r in unique(u_stability.modules$rep)) {
      res.temp <- u_stability.modules %>%
        dplyr::filter(rep==r & k==k_iter & size==s) %>%
        dplyr::select(-c("rep", "k", "size")) %>%
        pivot_wider(names_from = variable, values_from = value) %>%
        column_to_rownames(var="protein") %>%
        as.matrix()
      
      # Calculate correlation matrix
      temp_list[[length(temp_list)+1]] <- cor(t(res.temp))
    }
    
    u_stability.cor[[length(u_stability.cor)+1]] <- temp_list
  }
  names(u_stability.cor) <- unique(u_stability.modules$size)
  
  # Ground truth
  ground_truth.size <- as.character(max(as.integer(names(u_stability.cor))))
  ground_truth <- Reduce('+', u_stability.cor[[ground_truth.size]]) /
    length(u_stability.cor[[ground_truth.size]])
  
  # Compute mantel test for every U
  mantel_temp <- rapply(u_stability.cor, 
                        function(x, gt) {mantel_test(x, gt)$mantel_r}, 
                        how="list", gt=ground_truth)
  mantel_temp <- as.data.frame(do.call(cbind, mantel_temp)) %>%
    mutate(k=k_iter) %>%
    mutate_all(unlist)

  mantel_temp
}) %>% bind_rows() %>%
  melt(id.vars="k") %>%
  dplyr::rename(size=variable,
                mantel_cor=value) 

u_stability.mantel <- u_stability.mantel %>%
  mutate(size=strtoi(u_stability.mantel$size),
         mantel_cor=1-mantel_cor)

# Plot stability of U using mantel test results
u_stability.mantelPlot <- ggplot(u_stability.mantel, aes(size, mantel_cor)) +
  geom_point(aes(color=k, fill=k)) +
  geom_smooth(aes(color=k, fill=k), method="lm", formula=y ~ poly(x, 2)) +
  theme_cowplot() +
  scale_color_npg() +
  labs(x="Training size", y=expression(1 - cor["Mantel"]))
u_stability.mantelPlot

SMAF Parameters

Read in results from the stability analysis of U computed using different parameters (U_stability.ipynb). We then look at the heatmap of U and the pairwise correlations between U’s using the Mantel test.

k (W sparsity) (d=80, iter=10, paper_norm)

# Define path for results of different k's (sparsity)
u_stability_k.path <- file.path(u_stability_par.path, "k")

# Read in segmentation SCE results
u_stability_modules_k.files <- list.files(u_stability_k.path, 'gene_modules.csv',
                                         full.names=TRUE, recursive=TRUE)
u_stability_k.modules <- lapply(u_stability_modules_k.files, read_U, type="par") %>% bind_rows()

Heatmap of U for different k’s

# Plot U for different k's
u_stability_k.cor <- plot_U(u_stability_k.modules, "k", "rep")
k = 1

k = 10

k = 2

k = 3

k = 4

k = 5

k = 6

k = 7

k = 8

k = 9

Mantel test for different k’s

plot_metan_mantel(u_stability_k.cor, "k")
k = 1

k = 10

k = 2

k = 3

k = 4

k = 5

k = 6

k = 7

k = 8

k = 9

The boxplot shows a summary of the above mantel test results, by only looking at the pairwise correlations between any two matrices for each k.

u_stability_k.mantelBoxplot <- plot_mantel_boxplot(u_stability_k.cor, "k")
print(u_stability_k.mantelBoxplot)

maxItr (number of iterations to compute U) (k=3, d=80, paper_norm)

# Define path for results of different k's (sparsity)
u_stability_maxItr.path <- file.path(u_stability_par.path, "maxItr")

# Read in segmentation SCE results
u_stability_modules_maxItr.files <- list.files(u_stability_maxItr.path, 'gene_modules.csv',
                                               full.names=TRUE, recursive=TRUE)
u_stability_modules_maxItr.modules <- lapply(u_stability_modules_maxItr.files, read_U, type="par") %>% 
  bind_rows() %>%
  dplyr::rename(maxItr=k)

Heatmap of U for different maxItr

# Plot U for different maxItr
u_stability_maxItr.cor <- plot_U(u_stability_modules_maxItr.modules, "maxItr", "rep")
maxItr = 10

maxItr = 20

maxItr = 30

maxItr = 40

maxItr = 50

Mantel test for different maxItr

plot_metan_mantel(u_stability_maxItr.cor, "maxItr")
maxItr = 10

maxItr = 20

maxItr = 30

maxItr = 40

maxItr = 50

The boxplot shows a summary of the above mantel test results, by only looking at the pairwise correlations between any two matrices for each maxItr.

u_stability_maxItr.mantelBoxplot <- plot_mantel_boxplot(u_stability_maxItr.cor, "maxItr")
print(u_stability_maxItr.mantelBoxplot)

Transformation of X (before SMAF training) (k=1, d=80, iter=10)

# Define path for results of different k's (sparsity)
u_stability_xform.path <- file.path(u_stability_par.path, "transformation")

# Read in segmentation SCE results
u_stability_modules_xform.files <- list.files(u_stability_xform.path, 'gene_modules.csv',
                                              full.names=TRUE, recursive=TRUE)
u_stability_xform.modules <- lapply(u_stability_modules_xform.files, read_U, type="par") %>% 
  bind_rows() %>%
  dplyr::rename(transformation=k)

Heatmap of U for different transformations

# Plot U for different k's
u_stability_xform.cor <- plot_U(u_stability_xform.modules, "transformation", "rep")
transformation = exprs

transformation = log_exprs

transformation = min_max_norm

transformation = none

transformation = paper_norm

Mantel test for different transformations

plot_metan_mantel(u_stability_xform.cor, "transformation")
transformation = exprs

transformation = log_exprs

transformation = min_max_norm

transformation = none

transformation = paper_norm

The boxplot shows a summary of the above mantel test results, by only looking at the pairwise correlations between any two matrices for each transformation.

u_stability_xform.mantelBoxplot <- plot_mantel_boxplot(u_stability_xform.cor, "transformation", 
                                                       to_int=FALSE)
print(u_stability_xform.mantelBoxplot)

Dictionary size of U (number of modules in U) (k=1, iter=10, paper_norm)

# Define path for results of different k's (sparsity)
u_stability_dict.path <- file.path(u_stability_par.path, "dictSize")

# Read in segmentation SCE results
u_stability_modules_dict.files <- list.files(u_stability_dict.path, 'gene_modules.csv',
                                              full.names=TRUE, recursive=TRUE)
u_stability_dict.modules <- lapply(u_stability_modules_dict.files, read_U, type="par") %>% 
  bind_rows() %>%
  dplyr::rename(dictionary_size=k)

Heatmap of U for different dictionary sizes

# Plot U for different k's
u_stability_dict.cor <- plot_U(u_stability_dict.modules, "dictionary_size", "rep")
dictionary_size = 10

dictionary_size = 100

dictionary_size = 20

dictionary_size = 30

dictionary_size = 40

dictionary_size = 50

dictionary_size = 60

dictionary_size = 70

dictionary_size = 80

dictionary_size = 90

Mantel test for different dictionary sizes

plot_metan_mantel(u_stability_dict.cor, "dictionary_size")
dictionary_size = 10

dictionary_size = 100

dictionary_size = 20

dictionary_size = 30

dictionary_size = 40

dictionary_size = 50

dictionary_size = 60

dictionary_size = 70

dictionary_size = 80

dictionary_size = 90

The boxplot shows a summary of the above mantel test results, by only looking at the pairwise correlations between any two matrices for each dictionary size.

u_stability_dict.mantelBoxplot <- plot_mantel_boxplot(u_stability_dict.cor, "dictionary_size", 
                                                       to_int=TRUE)
print(u_stability_dict.mantelBoxplot)